# coding=utf8

import word2vec
import logging
logger = logging.getLogger('debug')

import preprocess

train_file = 'data/word2vec_train_data.txt'
vector_file = 'data/vector_data.txt'
VECTOR_SIZE = 300


def train_word2vec():
    print '进行word2vec的训练...'
    word2vec.word2vec(train_file, vector_file, size=VECTOR_SIZE, binary=0, verbose=False)


def test():
    model = word2vec.load(vector_file)
    indexes, metrics = model.cosine('the')
    response = model.generate_response(indexes, metrics).tolist()
    print response


def train():
    preprocess.connect_entity_words()
    preprocess.get_pure_train_data()
    train_word2vec()

